#### TABLE 2, ROW 3 (10-Fold Cross-Validation) ####
library(radiant)
##### RANDOM FOREST ####
# set working directory
setwd("data/analysis")

# import pooled set
all_data <- read.csv("pooled_data.csv")

all_data$pid7_F <- factor(all_data$PartyID7)
all_data$ATTEND_F <- factor(all_data$ATTEND)
all_data$INCOME_F <- factor(all_data$INCOME)
all_data$EDUC_F <- factor(all_data$EDUC)
all_data$EMPLOY_F <- factor(all_data$EMPLOY)
all_data$HOUSING_F <- factor(all_data$HOUSING)
all_data$TNRFU <- factor(all_data$TNRFU)


# drop missing data
all_data <- filter(all_data, !is.na(TNRFU), !is.na(pid7_F), !is.na(ATTEND_F))

# drop duplicates
all_data <- all_data |> distinct(TNRFU, GENDER2, EDUC_F, EMPLOY_F, HOME_TYPE5, 
                        INCOME_F, STATE, MARITAL6, INTERNET, PHONESERVICE5, ATTEND_F, 
                        METRO, pid7_F, HOUSING_F, HHSIZE, RACE, 
                                 .keep_all = T)
# set seed
set.seed(78712)

# vector for subsetting training set (90% of sample)
test <- sample(nrow(all_data), nrow(all_data)/10)

# learn random forest model predicting reluctant respondent
rf.nrfu <- rforest(all_data[-test,], "TNRFU", c("AGE", "GENDER2", "RACE", "EDUC_F",
                                                "MARITAL6", "EMPLOY_F", "INCOME_F",
                                                "METRO", "INTERNET", "pid7_F", "ATTEND_F",
                                                "HOUSING_F", "HOME_TYPE5", "PHONESERVICE5",
                                                "HHSIZE", "STATE"), 
                   lev = "1", num.trees = 500,
                   seed = 78712,
                   type = "classification")

# tune number of trees and minimum node size (tree depth)
cvfor <- cv.rforest(rf.nrfu, K = 10, mtry = 4, min.node.size = c(1, 2, 3),
                    num.trees = c(400, 500, 600, 700, 800))

# Table 2, Row 3 quantity
1- cvfor[1,1] 

# save as table in a tex file
data.frame(CV_10_fold = 1- cvfor[1,1]) |> 
      xtable(digits = 3) |>
      print(include.rownames = FALSE) |>
      write_file(file = "../../results/Table_2_pt5.tex")
